Intro to Stan & Generalised Linear Modelling

Matthew Talluto

28.11.2022

What is Stan?

What is Stan?

  1. Write a Stan model in a .stan file
  2. Prepare all data in R
  3. Use the rstan package to invoke the Stan interpreter
    • Translates your model into a C++ program w/ an HMC sampler, then compiles for your computer
  4. Run the program from R using rstan.
  5. Perform posterior inference using various R packages.

Representing models in Stan

mu = a + b*x;
y ~ normal(mu, s);
a ~ normal(0, 20);
b ~ normal(0, 5);
s ~ exponential(0.2);

Why Stan?

Writing models - data block

## R version
log_posterior = function(params, data) {
   y = data[['y']]
   x = data[['x']]
}
// Stan version
data {
   int n; // a single integer named n, number of data points
   vector [n] y; // a vector of n real numbers, named y
   vector [n] x;
}

Writing models - parameters block

## R version
log_posterior = function(params, data) {
   y = data[['y']]
   x = data[['x']]
   a = params['a']
   b = params['b']
   s = params['s']

   if(s <= 0)
      return(-Inf)
}
// Stan version
data {
   int n; // a single integer named n, number of data points
   vector [n] y; // a vector of n real numbers, named y
   vector [n] x;
}
parameters {
   real a;
   real b;
   real <lower=0> s; // a single real number that must be positive
}

Writing models - transformed parameters block

## R version
log_posterior = function(params, data) {
   y = data[['y']]
   x = data[['x']]
   a = params['a']
   b = params['b']
   s = params['s']

   if(s <= 0)
      return(-Inf)
   
   mu = a + b * x
}
// Stan version
data {
   int n; // a single integer named n, number of data points
   vector [n] y; // a vector of n real numbers, named y
   vector [n] x;
}
parameters {
   real a;
   real b;
   real <lower=0> s; // a single real number that must be positive
}
transformed parameters {
    vector [n] mu = a * b * x;
    // or another example
    // vector [n] log_x = log(x);
}

Writing models - model block

## R version
log_posterior = function(params, data) {
   y = data[['y']]
   x = data[['x']]
   a = params['a']
   b = params['b']
   s = params['s']

   if(s <= 0)
      return(-Inf)
   
   mu = a + b * x
   log_lhood = sum(dnorm(y, mu, s, log = TRUE))
}
// Stan version
data {
   int n; // a single integer named n, number of data points
   vector [n] y; // a vector of n real numbers, named y
   vector [n] x;
}
parameters {
   real a;
   real b;
   real <lower=0> s; // a single real number that must be positive
}
transformed parameters {
    vector [n] mu = a * b * x;
}
model {
    y ~ normal(mu, s);
}

Writing models - model block

## R version
log_posterior = function(params, data) {
   y = data[['y']]
   x = data[['x']]
   a = params['a']
   b = params['b']
   s = params['s']

   if(s <= 0)
      return(-Inf)
   
   mu = a + b * x
   log_lhood = sum(dnorm(y, mu, s, log = TRUE))
   log_prior = dnorm(a, 0, 10, log=TRUE) + 
        dnorm(B, 0, 5, log=TRUE) + 
        dexp(s, 0.1, log=TRUE)
   return(log_lhood + log_prior)
}
// Stan version
data {
   int n; // a single integer named n, number of data points
   vector [n] y; // a vector of n real numbers, named y
   vector [n] x;
}
parameters {
   real a;
   real b;
   real <lower=0> s; // a single real number that must be positive
}
transformed parameters {
    vector [n] mu = a * b * x;
}
model {
    y ~ normal(mu, s);
    a ~ normal(0, 10);
    b ~ normal(0, 5);
    s ~ exponential(0.1);
}

Save this code to a file. For example: stan/sample_stan_lm.stan

Fitting models

## R version
# get the data and format it
library(data.table)
Howell1 = fread("https://github.com/rmcelreath/rethinking/raw/master/data/Howell1.csv")
mh_data = list(
    y = Howell1$height
    x = Howell1$weight
)
# load the metropolis sampler from this course
source("r/mh.r")

# take 10000 samples, with adaptation
samples = metropolis(log_posterior, initial = c(a = 0, b = 0, s = 1), 
                     data = mh_data)
## rstan version
library(rstan)
library(data.table)

# get the data and format it for rstan
Howell1 = fread("https://github.com/rmcelreath/rethinking/raw/master/data/Howell1.csv")
stan_data = list(
    y = Howell1$height,
    x = Howell1$weight,
    n = length(Howell1$weight)
)
# compile the model
model = stan_model(file = "stan/sample_stan_lm.stan")

# take 5000 samples
samples = sampling(model, iter=5000, data = standata)

Variables in Stan

Scalar types

int my_int;
real my_real;
real <lower=0> my_positive_real;
real <lower=0, upper=1> my_proportion;

Variables in Stan - containers

Vectors

vector <constraint> [length] name

vector [10] my_ten_reals;
vector <lower=0> [n] n_positive_numbers;

Variables in Stan - containers

Vectors

vector <constraint> [length] name

Matrices

matrix <constraint> [rows, columns] name

vector [10] my_ten_reals;
vector <lower=0> [n] n_positive_numbers;
matrix [n,k] my_matrix;

Variables in Stan - containers

Vectors

vector <constraint> [length] name

Matrices

matrix <constraint> [rows, columns] name

Arrays

type <constraint> name [dimensions]

vector [10] my_ten_reals;
vector <lower=0> [n] n_positive_numbers;
matrix [n,k] my_matrix;
int ten_integers [10]; // array of ints
real ten_reals [10]; // array of reals
vector <lower=0> [10] array_of_vectors [5]; // 5 vectors, each of length 10

German Tank problem in Stan

  1. Recall the German tank problem presented in lecture. Use the following captured serial numbers:

s = c(147, 126, 183, 88, 9, 203, 16, 10, 112, 205)

\[ \begin{aligned} s & \sim \mathrm{U}(1, N_{max}) \\ N_{max} & \sim \mathrm{Gamma}(\alpha=0.001, \beta=0.001) \end{aligned} \]

German Tank problem: Stan model

data {
   int <lower = 1> n; // number of data points
   vector <lower=1> [n] s; // captured serial numbers
}
parameters {
   real <lower = max(s)> Nmax;
}
model {
   s ~ uniform(1, Nmax); // likelihood
   // this is a super vague prior!
   Nmax ~ gamma(0.001, 0.001);  
}
library(rstan)
 # the code above is saved in stan/tanks.stan
tanks_mod = stan_model("stan/tanks.stan")

German Tank problem: MAP estimate

s = c(147, 126, 183, 88, 9, 203, 16, 10, 112, 205)
standat = list(n = length(s), s = s)
fit_map = optimizing(tanks_mod, data = standat)
fit_map$par
## Nmax 
##  205

As before, the MAP is identical to max(s), but many other values are possible!

German Tank problem: Sampling

# parameter explanations:
#   iter: how many iterations (you will get iter/2 samples)
#   chains: how many parallel MCMC chains?
#   refresh: if 0, stan won't show you a progress bar (nice for rmd files)
fit = sampling(tanks_mod, data = standat, iter = 5000, chains = 1, refresh = 0)
samples = as.matrix(fit)
head(samples)
##           parameters
## iterations     Nmax      lp__
##       [1,] 226.1407 -56.75793
##       [2,] 226.1407 -56.75793
##       [3,] 211.6211 -57.17141
##       [4,] 206.6496 -58.29355
##       [5,] 206.9084 -58.16191
##       [6,] 205.5159 -59.39410

source("../r/metrop.r") ## for HDPI
rbind(hdpi = hpdi(samples[,1]), 
      quantile = quantile(samples[,1], c(0.05, 0.95)))
##                5%      95%
## hdpi     205.0023 260.3608
## quantile 205.8477 279.2651
library(bayesplot)
mcmc_combo(samples, c("hist", "trace"), pars = "Nmax")

More linear models

More linear models

\[ \mathbb{E}(y) = a + b_1x + b_2x^2 \]

Categorical Variables

Howell1$age_group = cut(Howell1$age, breaks = c(-1, 12, 22, 200), 
                        labels = c("child", "young", "adult"))
Howell1
##       height    weight age male age_group
##   1: 151.765 47.825606  63    1     adult
##   2: 139.700 36.485806  63    0     adult
##   3: 136.525 31.864838  65    0     adult
##   4: 156.845 53.041914  41    1     adult
##   5: 145.415 41.276872  51    0     adult
##  ---                                     
## 540: 145.415 31.127751  17    1     young
## 541: 162.560 52.163080  31    1     adult
## 542: 156.210 54.062496  21    0     young
## 543:  71.120  8.051258   0    1     child
## 544: 158.750 52.531623  68    1     adult

Categorical Variables

Howell1$is_child = ifelse(Howell1$age_group == "child", 1, 0)
Howell1$is_young = ifelse(Howell1$age_group == "young", 1, 0)
Howell1
##       height    weight age male age_group is_child is_young
##   1: 151.765 47.825606  63    1     adult        0        0
##   2: 139.700 36.485806  63    0     adult        0        0
##   3: 136.525 31.864838  65    0     adult        0        0
##   4: 156.845 53.041914  41    1     adult        0        0
##   5: 145.415 41.276872  51    0     adult        0        0
##  ---                                                       
## 540: 145.415 31.127751  17    1     young        0        1
## 541: 162.560 52.163080  31    1     adult        0        0
## 542: 156.210 54.062496  21    0     young        0        1
## 543:  71.120  8.051258   0    1     child        1        0
## 544: 158.750 52.531623  68    1     adult        0        0

Categorical Variables

data {
   int n; 
   vector [n] height; 
   vector [n] weight;
   vector  <lower = 0, upper = 1> [n] is_child;
   vector  <lower = 0, upper = 1> [n] is_young;
}
parameters {
   real a; // default intercept (for adults)
   real a_child; // additional effect of being a child
   real a_young; // additional effect of being young
   real b;
   real <lower=0> s;
}
transformed parameters {
    vector [n] intercept = a + a_child * is_child + a_young * is_young;
    vector [n] mu = intercept + b * x;
}
model {
    y ~ normal(mu, s);
    a ~ normal(0, 10);
    a_child ~ normal(0, 10);
    a_young ~ normal(0, 10);
    b ~ normal(0, 5);
    s ~ exponential(0.1);
}

Categorical Variables

data {
   int n; 
   vector [n] height; 
   vector [n] weight;
   vector  <lower = 0, upper = 1> [n]is_child;
   vector  <lower = 0, upper = 1> [n] is_young;
}
parameters {
   real a; // default intercept (for adults)
   real a_child; // additional effect of being a child
   real a_young; // additional effect of being young
   real b; // default slope (for adults)
   real b_child; // additional slope effect of being a child
   real b_young; // additional slope effect of being young
   real <lower=0> s;
}
transformed parameters {
    vector [n] intercept = a + a_child * is_child + a_young * is_young;
    vector [n] slope = b + b_child * is_child + b_young * is_young;
    vector [n] mu = intercept + slope * x;
}
model {
    y ~ normal(mu, s);
    a ~ normal(0, 10);
    a_child ~ normal(0, 10);
    a_young ~ normal(0, 10);
    b ~ normal(0, 5);
    b_child ~ normal(0, 5);
    b_young ~ normal(0, 5);
    s ~ exponential(0.1);
}

Simplifying our notation for E(y)

\[ \mathbb{E}(y) = a + \mathbf{B}\mathbf{X} \]

X = cbind("weight" = Howell1$weight, 
          "weight^2" = Howell1$weight^2, 
          "child" = Howell1$is_child, 
          "young" = Howell1$is_young, 
            # we need separate variables for interactions
          "weight × child" = Howell1$weight * Howell1$is_child,
          "weight × young" = Howell1$weight * Howell1$is_young)
head(X)
##        weight weight^2 child young weight × child weight × young
## [1,] 47.82561 2287.289     0     0              0              0
## [2,] 36.48581 1331.214     0     0              0              0
## [3,] 31.86484 1015.368     0     0              0              0
## [4,] 53.04191 2813.445     0     0              0              0
## [5,] 41.27687 1703.780     0     0              0              0
## [6,] 62.99259 3968.066     0     0              0              0

Simplifying our notation for E(y)

\[ \begin{aligned} \mathbb{E}(y) & = a + \mathbf{B}\mathbf{X} \\ y & \sim \mathcal{N}(\mathbb{E}(y), s) \end{aligned} \]

log_liklihood = function(params, data) {
    a = params[1]
    s = params[2]
    B = params[3:length(params)]
    ## %*% is matrix multiplication, returns a vector length n
    mu = a + data$X %*% B  
    sum(dnorm(data$height, mu, s, log=TRUE))
}
data {
   int n; 
   int k;
   vector [n] height; 
   matrix [n, k] X;
}
parameters {
   real a;
   vector [k] B;
   real <lower=0> s;
}
transformed parameters {
    vector [n] mu = a + X * B;
}
model {
    y ~ normal(mu, s);
    a ~ normal(0, 10);
    B ~ normal(0, 5); // prior is the same for the whole vector
    s ~ exponential(0.1);
}

Changing the distribution of y

\[ \begin{aligned} \mathbb{E}(y) & = a + \mathbf{B}\mathbf{X} \\ \rho & = \mathcal{f}(\mathbb{E}(y), \ldots) \\ y & \sim \mathcal{D}(\rho) \end{aligned} \]

Example: Tree mortality as a function of temperature

tsuga = readRDS("exercises/data/tsuga.rds")
tsuga[, 4:8]
##       born died n annual_mean_temp tot_annual_pp
##    1:    1    0 0         3.414667     1085.2000
##    2:    0    3 5         3.849333     1003.0000
##    3:    3    0 6         3.452000     1076.1333
##    4:    1    0 3         3.620000     1099.4000
##    5:    0    4 4         4.596000      989.4667
##   ---                                           
## 1538:    0    1 1         4.329333     1234.0000
## 1539:    1    0 3         4.134000     1191.0000
## 1540:    0    1 4         4.372000     1207.2667
## 1541:    0    1 2         4.247333     1220.2667
## 1542:    0    1 4         4.283333     1200.9333

Tree mortality log posterior

\[ \begin{aligned} \mathbb{E}(n_{died}/n_{total}) &= p = a + bT \\ n_{died} & \sim \mathrm{Binomial}(n_{total}, p) \end{aligned} \]

data {
   int n; // number of plots
   int n_died [n]; // number of dead trees in each plot
   int n_total [n]; // number of trees in each plot 
   vector [n] annual_mean_temp; // temperature
}
parameters {
   real a;
   real b;
   // no s: there is no variance parameter
   // for the binomial distribution!
}
transformed parameters {
    vector [n] p = a + b * annual_mean_temp;
}
model {
    n_died ~ binomial(n_total, p);
    a ~ normal(0, 10);
    b ~ normal(0, 5); 
}

Fitting the model

log_posterior = function(par, dat) {
    # the probability of death changes with temperature
    p = par['a'] + par['b'] * dat$annual_mean_temp
    
    # likelihood
    logpr = sum(dbinom(dat$died, dat$n, p, log=TRUE))
    
    # priors
    logpr + dnorm(par['a'], 0, 20, log=TRUE) + 
        dnorm(par['b'], 0, 5, log=TRUE)
}

fit = optim(c(a=0.5, b=0), log_posterior, dat = tsuga[n > 0], 
            method = "Nelder-Mead", control=list(fnscale=-1))
## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

## Warning in dbinom(dat$died, dat$n, p, log = TRUE): NaNs produced

Binomial-logistic regression - Stan

// saved in file stan/tree_binom.stan
data {
   int n; // number of plots
   int n_died [n]; // number of dead trees in each plot
   int n_total [n]; // number of trees in each plot 
   vector [n] annual_mean_temp; // temperature
}
parameters {
   real a;
   real b;
   // no s: there is no variance parameter
   // for the binomial distribution!
}
transformed parameters {
    vector [n] p = inv_logit(a + b * annual_mean_temp);
}
model {
    n_died ~ binomial(n_total, p);

    // priors are the same regardless
    a ~ normal(0, 10);
    b ~ normal(0, 5); 
}
mod = stan_model("stan/tree_binom.stan")
fit = sampling(stan_model, iter = 5000)

Binomial-logistic regression - R/optim

log_posterior = function(par, dat) {
    # the probability of death changes with temperature
    p = plogis(par['a'] + par['b'] * dat$annual_mean_temp)
    
    # likelihood
    logpr = sum(dbinom(dat$died, dat$n, p, log=TRUE))
    
    # priors
    logpr + dnorm(par['a'], 0, 20, log=TRUE) + 
        dnorm(par['b'], 0, 5, log=TRUE)
}

fit = optim(c(a=0.5, b=0), log_posterior, dat = tsuga[n > 0], 
            method = "Nelder-Mead", control=list(fnscale=-1), 
            hessian = TRUE)
vcv = solve(-fit$hessian)
fit$par
##          a          b 
## -1.0266212 -0.1291986

Count Data: Poisson

\[ \begin{aligned} \lambda & = \exp(a + \mathbf{BX}) \\ y & \sim \mathrm{Poisson}(\lambda) \end{aligned} \]

Constraint: \(\sigma^2_{y|\mathbf{X}} = \mathbb{E}(y|\mathbf{X}) = \lambda\)

\[ y \sim \mathrm{Poisson}(u\lambda) \]

Overdispersed counts: Negative Binomial

\[ \begin{aligned} \mu & = \exp(a + \mathbf{BX}) \\ y & \sim \mathrm{NB}(\mu, \phi) \end{aligned} \]

\(\phi\) is called the dispersion parameter, and is related to the variance:

\[ \sigma^2_{y|\mathbf{X}} = \mu + \frac{\mu^2}{\phi} \]

Proportions: Beta

\[ \begin{aligned} \mu & = \mathrm{logit}^{-1}(a_\rho + \mathbf{B_\rho X_\rho}) \\ \phi & = \exp(a_\phi + \mathbf{B_\phi X_\phi}) & \mathrm{\small optionally} \\ \alpha & = \mu\phi \\ \beta & = (1 - \mu)\phi \\ y & \sim \mathrm{Beta}(\alpha, \beta) \end{aligned} \]

Caveats & Hints

Continuous, strictly positive: Gamma

\[ \begin{aligned} \mu & = \frac{1}{(a + \mathbf{B X})} & OR \\ \mu & = \exp(a + \mathbf{B X}) \\ \\ \alpha & = \frac{\mu^2}{\phi} \\ \beta & = \frac{\mu}{\phi} \\ y & \sim \mathrm{Gamma}(\alpha, \beta) \end{aligned} \]

Bayesian analysis workflow

  1. Specify joint posterior graphically, mathematically, and in code

Bayesian analysis workflow

  1. Specify joint posterior graphically, mathematically, and in code
  2. Draw samples from the joint posterior distribution

Bayesian analysis workflow

  1. Specify joint posterior graphically, mathematically, and in code
  2. Draw samples from the joint posterior distribution
  3. Evaluate/diagnose the model’s performance

Bayesian analysis workflow

  1. Specify joint posterior graphically, mathematically, and in code
  2. Draw samples from the joint posterior distribution
  3. Evaluate/diagnose the model’s performance
  4. Perform posterior inference

1. Joint posterior: likelihood

1. Joint posterior: likelihood

1. Joint posterior: priors

1. Joint posterior: priors

GLM in Stan

For this exercise, you will use the birddiv (in docs/exercises/data/birddiv.csv) dataset; you can load it directly from github using data.table::fread(). Bird diversity was measured in 1-km^2 plots in multiple countries of Europe, investigating the effects of habitat fragmentation and productivity on diversity. We will consider a subset of the data. Specificially, we will ask how various covariates are associated with the diversity of birds specializing on different habitat types. The data have the following potential predictors:

All of the above variables are standardized to a 0-100 scale. Consider this when choosing priors.

Your response variable will be richness, the bird species richness in the plot. Additionally, you have an indicator variable hab_type. This is not telling you what habitat type was sampled (plots included multiple habitats). Rather, this is telling you what type of bird species were counted for the richness measurement: so hab_type == "forest" & richness == 7 indicates that 7 forest specialists were observed in that plot.

Build one or more generalised linear models for bird richness. Your task should be to describe two things: (1) how does richness vary with climate, productivity, fragmentation, or habitat diversity, and (2) do these relationships vary depending on what habitat bird species specialize on.

1. Specify joint posterior

transformed parameters {
    lambda = exp(a + X * B);
}
model {
    richness ~ poisson(lambda);
    a ~ normal(0, 10);
    B ~ normal(0, 5);
}

1. Specify joint posterior

data {
    int <lower=0> n; // number of data points
    int <lower=0> k; // number of x-variables
    int <lower=0> richness [n];
    matrix [n,k] X;
}
parameters {
    real a;
    vector [k] B;
}
transformed parameters {
    vector <lower=0> [n] lambda;
    lambda = exp(a + X * B);
}
model {
    richness ~ poisson(lambda);
    a ~ normal(0, 10);
    B ~ normal(0, 5);
}
generated quantities {
    int r_predict [n];
    for(i in 1:n)
        r_predict[i] = poisson_rng(lambda[i]);
    r_predict = poisson_rng(lambda);
}
bird_glm = stan_model("stan/bird_glm.stan")

2. Sample from joint posterior

2. Sample from the joint posterior

library(data.table)
birds = fread("exercises/data/birddiv.csv")

# stan can't handle NAs
birds = birds[complete.cases(birds)]

# original variables scaled from 0-100; rescale to go from 0 to 1
X_scaled = as.matrix(birds[,c(2:7)])/100

# I will try two models
# First, simple model, only forest birds in relation to forest cover
for_i = which(birds$hab_type == "forest")
standat1 = list(
    n = length(for_i), 
    k = 1,
    richness = birds$richness[for_i],
    X = X_scaled[for_i, "NDVI", drop=FALSE])

fit1 = sampling(bird_glm, data = standat1, iter=2000, 
   chains = 4, refresh = 0)
# Second, looking at how two variables influence birds of different types

# grab two variables
X = X_scaled[, c("For.cover", "NDVI")]

# add a categorical variable for bird type
X = cbind(X, open=ifelse(birds$hab_type == "open",1, 0))
X = cbind(X, generalist=ifelse(birds$hab_type == "generalist",1, 0))

# add interaction terms with the categories
X = cbind(X, op_forCov=X[,1]*X[,3], op_NDVI=X[,1]*X[,4], 
   ge_forCov=X[,2]*X[,3], ge_NDVI=X[,2]*X[,4])

head(X)
##      For.cover       NDVI open generalist op_forCov op_NDVI ge_forCov ge_NDVI
## [1,]  0.998675 0.60381356    0          0         0       0         0       0
## [2,]  0.000000 0.22881356    0          0         0       0         0       0
## [3,]  0.382825 0.11864407    0          0         0       0         0       0
## [4,]  0.114125 0.19067797    0          0         0       0         0       0
## [5,]  0.000000 0.02118644    0          0         0       0         0       0
## [6,]  1.000000 0.54025424    0          0         0       0         0       0

standat2 = with(birds, list(
    n = length(richness), 
    k = ncol(X),
    richness = richness,
    X = X))
fit2 = sampling(bird_glm, data = standat2, iter=2000, 
   chains = 4, refresh = 0)

3. Evaluate the model fit

## Use as.array if you want to keep different mcmc chains separate
## This is ideal for diagnostics
## For inference, you usually want to lump all chains
## In this case, you use as.matrix
samp1_pars = as.array(fit1, pars=c('a', 'B'))
mcmc_combo(samp1_pars, c("hist", "trace"))

print(fit1, pars = c('a', 'B'))
## Inference for Stan model: 5c0b16bbee48bcf4a957b09a484d1746.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##      mean se_mean   sd 2.5%  25%  50%  75% 97.5% n_eff Rhat
## a    1.55    0.00 0.09 1.38 1.49 1.55 1.61  1.73   757 1.01
## B[1] 0.67    0.01 0.15 0.38 0.57 0.67 0.77  0.95   768 1.01
## 
## Samples were drawn using NUTS(diag_e) at Sun Nov 27 10:48:42 2022.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

4. Inference: parameter estimates

mcmc_intervals(samp1_pars)

samp2_pars = as.array(fit2, pars=c('a', 'B'))
mcmc_intervals(samp2_pars)

4. Inference: Retrodiction

# compute posterior distribution of the residual sum of squares
samp1_lam = as.matrix(fit1, pars='lambda')
sq_resid1 = apply(samp1_lam, 1, function(x) (standat1$richness - x)^2)

# compute posterior distribution of dispersion parameter, which is just 
# sum(squared residuals)/(n - k)
# here k is 2, we have an intercept and one slope
# if phi > 1, we have overdispersion and need a better model
phi = apply(sq_resid1, 2, function(x) sum(x)/(length(x) - 2))
quantile(phi, c(0.5, 0.05, 0.95))
##      50%       5%      95% 
## 16.49676 16.39371 16.84566

4. Inference: Improve the model

## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

# compute posterior distribution of the residual sum of squares
samp2_lam = as.matrix(fit2, pars='lambda')
sq_resid2 = apply(samp2_lam, 1, function(x) (standat2$richness - x)^2)

# compute posterior distribution of dispersion parameter, which is just 
# sum(squared residuals)/(n - k)
# here k is 2, we have an intercept and one slope
# if phi > 1, we have overdispersion and need a better model
phi = apply(sq_resid2, 2, function(x) sum(x)/(length(x) - 2))
quantile(phi, c(0.5, 0.05, 0.95))
##      50%       5%      95% 
## 7.474143 7.381813 7.653988

4. Inference: Partial Response Curves

4. Inference: Response Surfaces